library(tidyverse)
library(biomaRt)
library(ggrepel)
library(clusterProfiler)
# Parallel
library(BiocParallel)
register(MulticoreParam(6))
load('../data/microarray_NGS_objects.Rdata')
load('../data/top_tables.Rdata')
sva_counts <- read_tsv('../data/sva_counts.tsv.gz')
Rows: 14318 Columns: 66
ββ Column specification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Delimiter: "\t"
chr (1): Gene
dbl (65): GSM2944692, GSM2944693, GSM2944694, GSM2944695, GSM2944696, GSM2944697, GSM2944698, GSM2944699, GSM2944700, GSM327854, GSM327855, GSM327856, GSM327857, GSM327858, GSM327859...
βΉ Use `spec()` to retrieve the full column specification for this data.
βΉ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sample_meta_D <- sample_meta %>% filter(Sample %in% colnames(sva_counts)) %>%
dplyr::select(Sample:Section, Layout:Fusion) %>%
mutate(S2 = case_when(Section == 'OF' ~ 'OF', TRUE ~ 'OC')) %>%
unique()
box_marker <- function(table, genes, section = c('OF','OC')){
if ('matrix' %in% class(table)){
table <- table %>%
as_tibble(rownames = 'Gene')
}
table %>%
pivot_longer(-Gene, names_to = 'Sample', values_to = 'Expression') %>%
mutate(Sample = gsub('_.*|.CEL.*','',Sample)) %>%
left_join(sample_meta_D) %>%
mutate(S2 = case_when(Section == 'OF' ~ 'OF',TRUE ~ 'OC')) %>%
filter(Gene %in% genes, S2 %in% section) %>%
#filter(Gene %in% row.names(top.table_OF_AD %>% head(10))) %>%
mutate(Fusion = factor(Fusion, levels = c('Before','During','After'))) %>%
mutate(OrgTech = paste(Organism, Technology, sep = '_')) %>%
ggplot(aes(x=Fusion, y=Expression, color = Organism, shape = Technology)) +
# geom_boxplot(aes(group = Fusion), color = 'Black', outlier.colour = NA) +
# geom_boxplot(aes(group = interaction(Organism,Fusion)), outlier.colour = NA) +
ggbeeswarm::geom_quasirandom(size = 3, alpha = 0.7) +
cowplot::theme_cowplot() +
facet_grid(~Gene + S2, scales = 'free_y') +
ggsci::scale_color_aaas() +
ylab('log2 (corrected counts)') +
stat_summary(fun=mean, geom = 'line', aes(group = OrgTech, color = Organism))
}
volcano_maker <- function(df,
title="Volcano Plot",
pvalue='P.Value',
padj='adj.P.Val',
logFC='logFC',
gene_list = ''){
df$pvalue <- df[,pvalue]
df$log2FoldChange <- df[,logFC]
df$padj <- df[,padj]
df$Gene <- row.names(df)
df <- df[!is.na(df$pvalue),]
print(dim(df))
df <- df %>% mutate(Class = case_when(padj < 0.05 & abs(logFC) > 1~ "FDR < 0.05 & logFC > 1",
padj < 0.1 & abs(logFC) > 1 ~ 'FDR < 0.1 & logFC > 1',
TRUE ~ 'Not significant'))
df$GeneT <- df$Gene
if (gene_list == ''){
gene_list <- df %>% filter(padj < 0.05) %>% pull(Gene) %>% head(10)
}
df$Gene[!df$Gene %in% gene_list] <- ''
plot <- ggplot(data=df,aes(label=Gene, x = log2FoldChange, y = -log10(pvalue))) +
geom_point(aes(colour=Class)) +
scale_colour_manual(values=c("darkred", "red", "grey")) +
cowplot::theme_cowplot() +
geom_vline(aes(xintercept=-1),linetype="dotted") +
geom_vline(aes(xintercept=1),linetype="dotted") +
geom_vline(aes(xintercept=-2),linetype="dotted") +
geom_vline(aes(xintercept=2),linetype="dotted") +
geom_label_repel(max.overlaps = 100) +
xlab('logFC') + ylab('-log10(p value)') +
ggtitle(title) + cowplot::theme_cowplot()
plot
}
2021-12-06
Negative means that expression is dropping from as the fissure goes from βbeforeβ to βduring.β This test is designed to identify which genes are changing across time in the optic fissure region.
volcano_maker(top.table_OF_DB, title = 'Optic Fissure - Before/During',
gene_list = c(top.table_OF_DB %>% filter(logFC > 0) %>% head(5) %>% row.names(),
top.table_OF_DB %>% filter(logFC < 0) %>% head(5) %>% row.names()))
[1] 14318 10
Warning in if (gene_list == "") { :
the condition has length > 1 and only the first element will be used
All genes with an FDR < 0.2. Not very many. There are a limited number of samples before the fusion begins (only ZEBRAFISH in fact!) compared to the other time points.
top.table_OF_DB %>% as_tibble(rownames = 'Gene') %>% filter(adj.P.Val < 0.1) %>% DT::datatable()
Colored by organism. Each line is drawn for organism / technology (remember, mouse has both microarray and RNA-seq).
box_marker(sva_counts,
genes = top.table_OF_DB %>%
as_tibble(rownames = 'Gene') %>%
filter(adj.P.Val < 0.05) %>% head(6) %>% pull(Gene),
section = 'OF')
Joining, by = "Sample"
box_marker(sva_counts,
genes = top.table_OF_DB %>%
as_tibble(rownames = 'Gene') %>%
filter(adj.P.Val < 0.05) %>% head(6) %>% pull(Gene),
section = 'OC')
Joining, by = "Sample"
GSEA uses a ranked list of genes by logFC. So the p values are not used in this situation. The order is. So the GSEA is useful in situations where there are very few differentially expressed genes. We see here the suppressed (negative expressed genes) terms are relating to mitosis / cell division. The activated gene terms are relating to melanocytes/pigment and ion channels.
dotplot(gseF, showCategory=15, split=".sign") + facet_grid(.~.sign) + cowplot::theme_cowplot()
wrong orderBy parameter; set to default `orderBy = "x"`
So you can see the genes in the ontology term. The genes get βincludedβ as enriched if GSEA deems them to be ranked unusually high.
gse@result %>% as_tibble() %>% arrange(-abs(NES)) %>% filter(p.adjust < 0.05) %>% DT::datatable()
GO enrichment uses a cutoff between differentially expressed genes (FDR < 0.1 in this case) and everything else.
library(clusterProfiler)
library(enrichplot)
diff_genes <- top.table_OF_DB %>% as_tibble(rownames = 'Gene') %>% filter(adj.P.Val < 0.1)
eg_diff_genes <- bitr(diff_genes$Gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
'select()' returned 1:1 mapping between keys and columns
eg_diff_genes <- diff_genes %>% left_join(., eg_diff_genes, by = c('Gene' = 'SYMBOL'))
eg_universe = bitr(top.table_OF_DB %>% as_tibble(rownames = 'Gene') %>% pull(Gene), fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
'select()' returned 1:many mapping between keys and columns
Warning in bitr(top.table_OF_DB %>% as_tibble(rownames = "Gene") %>% pull(Gene), :
0.01% of input gene IDs are fail to map...
egoOF_DB <- enrichGO(gene = eg_diff_genes$ENTREZID,
universe = eg_universe$ENTREZID,
OrgDb = org.Hs.eg.db,
ont = "all",
readable = TRUE)
p1 <- dotplot(egoOF_DB, showCategory=20) + ggtitle("Dotplot for GO, OF")
wrong orderBy parameter; set to default `orderBy = "x"`
p1
NA
NA
So you can see the genes in the ontology term.
egoOF_DB@result %>% as_tibble() %>% filter(p.adjust < 0.05) %>% DT::datatable()
Relationships between related GO terms with shared genes
geneList <- eg_diff_genes$logFC
names(geneList) <- eg_diff_genes$Gene
cnet <- cnetplot(egoOF_DB, foldChange = geneList, showCategory = 12) + scale_color_viridis_c(name = 'log2(FoldChange)')
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
cnet
# system("wget https://wikipathways-data.wmcloud.org/current/gmt/wikipathways-20211110-gmt-Homo_sapiens.gmt")
wp2gene <- read.gmt('wikipathways-20211110-gmt-Homo_sapiens.gmt')
wp2gene <- wp2gene %>% tidyr::separate(term, c("name","version","wpid","org"), "%")
wpid2gene <- wp2gene %>% dplyr::select(wpid, gene) #TERM2GENE
wpid2name <- wp2gene %>% dplyr::select(wpid, name) #TERM2NAME
ewp <- enricher(eg_diff_genes$ENTREZID,
TERM2GENE = wpid2gene,
TERM2NAME = wpid2name,
pvalueCutoff = 0.1)
ewp_plot <- dotplot(ewp, showCategory=30) + ggtitle("Dotplot for WikiPathways")
wrong orderBy parameter; set to default `orderBy = "x"`
ewp_plot
kk <- enrichKEGG(gene = eg_diff_genes$ENTREZID,
universe = eg_universe$ENTREZID,
organism = 'hsa')
dotplot(kk) + ggtitle("KEGG Pathway Enrichment")
wrong orderBy parameter; set to default `orderBy = "x"`
devtools::session_info()
β Session info ππΎ πΉπ΄ π βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
hash: person in bed: medium-dark skin tone, flag: Tonga, kaaba
setting value
version R version 4.0.5 (2021-03-31)
os macOS Catalina 10.15.7
system x86_64, darwin17.0
ui RStudio
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/New_York
date 2021-12-06
rstudio 2021.09.0+351 Ghost Orchid (desktop)
pandoc 2.14.0.3 @ /Applications/RStudio.app/Contents/MacOS/pandoc/ (via rmarkdown)
β Packages βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
package * version date (UTC) lib source
annotate 1.68.0 2020-10-27 [1] Bioconductor
AnnotationDbi * 1.52.0 2020-10-27 [1] Bioconductor
askpass 1.1 2019-01-13 [1] CRAN (R 4.0.2)
assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
backports 1.3.0 2021-10-27 [1] CRAN (R 4.0.2)
beeswarm 0.4.0 2021-06-01 [1] CRAN (R 4.0.2)
Biobase * 2.50.0 2020-10-27 [1] Bioconductor
BiocFileCache 1.14.0 2020-10-27 [1] Bioconductor
BiocGenerics * 0.36.1 2021-04-16 [1] Bioconductor
BiocManager 1.30.16 2021-06-15 [1] CRAN (R 4.0.2)
BiocParallel * 1.24.1 2020-11-06 [1] Bioconductor
biomaRt * 2.46.3 2021-02-11 [1] Bioconductor
bit 4.0.4 2020-08-04 [1] CRAN (R 4.0.2)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.0.2)
bitops 1.0-7 2021-04-24 [1] CRAN (R 4.0.2)
blob 1.2.2 2021-07-23 [1] CRAN (R 4.0.2)
broom 0.7.10 2021-10-31 [1] CRAN (R 4.0.2)
bslib 0.3.1 2021-10-06 [1] CRAN (R 4.0.2)
cachem 1.0.6 2021-08-19 [1] CRAN (R 4.0.2)
callr 3.7.0 2021-04-20 [1] CRAN (R 4.0.2)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.0.2)
cli 3.1.0 2021-10-27 [1] CRAN (R 4.0.2)
clusterProfiler * 3.18.1 2021-02-11 [1] Bioconductor
colorspace 2.0-2 2021-06-24 [1] CRAN (R 4.0.2)
cowplot 1.1.1 2020-12-30 [1] CRAN (R 4.0.2)
crayon 1.4.2 2021-10-29 [1] CRAN (R 4.0.2)
crosstalk 1.1.1 2021-01-12 [1] CRAN (R 4.0.2)
curl 4.3.2 2021-06-23 [1] CRAN (R 4.0.2)
data.table 1.14.2 2021-09-27 [1] CRAN (R 4.0.2)
DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.2)
dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.0.2)
DelayedArray 0.16.3 2021-03-24 [1] Bioconductor
desc 1.4.0 2021-09-28 [1] CRAN (R 4.0.2)
devtools 2.4.2 2021-06-07 [1] CRAN (R 4.0.2)
digest 0.6.28 2021-09-23 [1] CRAN (R 4.0.2)
DO.db 2.9 2020-11-19 [1] Bioconductor
DOSE 3.16.0 2020-10-27 [1] Bioconductor
downloader 0.4 2015-07-09 [1] CRAN (R 4.0.2)
dplyr * 1.0.7 2021-06-18 [1] CRAN (R 4.0.2)
DT 0.19 2021-09-02 [1] CRAN (R 4.0.2)
edgeR 3.32.1 2021-01-14 [1] Bioconductor
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.2)
enrichplot * 1.10.2 2021-01-28 [1] Bioconductor
evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.1)
fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.2)
farver 2.1.0 2021-02-28 [1] CRAN (R 4.0.2)
fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.0.2)
fastmatch 1.1-3 2021-07-23 [1] CRAN (R 4.0.2)
fgsea 1.16.0 2020-10-27 [1] Bioconductor
forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.0.2)
fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
genefilter 1.72.1 2021-01-21 [1] Bioconductor
generics 0.1.1 2021-10-25 [1] CRAN (R 4.0.2)
GenomeInfoDb 1.26.7 2021-04-08 [1] Bioconductor
GenomeInfoDbData 1.2.4 2020-10-07 [1] Bioconductor
GenomicRanges 1.42.0 2020-10-27 [1] Bioconductor
ggbeeswarm 0.6.0 2017-08-07 [1] CRAN (R 4.0.2)
ggforce 0.3.3 2021-03-05 [1] CRAN (R 4.0.2)
ggfun 0.0.4 2021-09-17 [1] CRAN (R 4.0.2)
ggnewscale 0.4.5 2021-01-11 [1] CRAN (R 4.0.2)
ggplot2 * 3.3.5 2021-06-25 [1] CRAN (R 4.0.2)
ggraph 2.0.5 2021-02-23 [1] CRAN (R 4.0.2)
ggrepel * 0.9.1 2021-01-15 [1] CRAN (R 4.0.2)
ggsci 2.9 2018-05-14 [1] CRAN (R 4.0.2)
glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
GO.db 3.12.1 2020-11-19 [1] Bioconductor
GOSemSim 2.16.1 2020-10-29 [1] Bioconductor
graphlayouts 0.7.1 2020-10-26 [1] CRAN (R 4.0.2)
gridExtra 2.3 2017-09-09 [1] CRAN (R 4.0.2)
gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.2)
haven 2.4.3 2021-08-04 [1] CRAN (R 4.0.2)
hms 1.1.1 2021-09-26 [1] CRAN (R 4.0.2)
htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.0.2)
htmlwidgets 1.5.4 2021-09-08 [1] CRAN (R 4.0.2)
httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
igraph 1.2.7 2021-10-15 [1] CRAN (R 4.0.2)
IRanges * 2.24.1 2020-12-12 [1] Bioconductor
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.2)
jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
knitr 1.36 2021-09-29 [1] CRAN (R 4.0.2)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.2)
lattice 0.20-45 2021-09-22 [1] CRAN (R 4.0.2)
lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.0.2)
limma 3.46.0 2020-10-27 [1] Bioconductor
locfit 1.5-9.4 2020-03-25 [1] CRAN (R 4.0.2)
lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.0.2)
magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
MASS 7.3-54 2021-05-03 [1] CRAN (R 4.0.2)
Matrix 1.3-4 2021-06-01 [1] CRAN (R 4.0.2)
MatrixGenerics 1.2.1 2021-01-30 [1] Bioconductor
matrixStats 0.61.0 2021-09-17 [1] CRAN (R 4.0.2)
memoise 2.0.0 2021-01-26 [1] CRAN (R 4.0.2)
mgcv 1.8-38 2021-10-06 [1] CRAN (R 4.0.2)
modelr 0.1.8 2020-05-19 [1] CRAN (R 4.0.2)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.2)
nlme 3.1-153 2021-09-07 [1] CRAN (R 4.0.2)
openssl 1.4.5 2021-09-02 [1] CRAN (R 4.0.2)
org.Hs.eg.db * 3.12.0 2020-11-19 [1] Bioconductor
pillar 1.6.4 2021-10-18 [1] CRAN (R 4.0.2)
pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.0.2)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
pkgload 1.2.3 2021-10-13 [1] CRAN (R 4.0.2)
plyr 1.8.6 2020-03-03 [1] CRAN (R 4.0.2)
polyclip 1.10-0 2019-03-14 [1] CRAN (R 4.0.2)
preprocessCore 1.52.1 2021-01-08 [1] Bioconductor
prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.2)
processx 3.5.2 2021-04-30 [1] CRAN (R 4.0.2)
progress 1.2.2 2019-05-16 [1] CRAN (R 4.0.2)
ps 1.6.0 2021-02-28 [1] CRAN (R 4.0.2)
purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
qsmooth * 1.6.0 2020-10-27 [1] Bioconductor
qvalue 2.22.0 2020-10-27 [1] Bioconductor
R6 2.5.1 2021-08-19 [1] CRAN (R 4.0.2)
rappdirs 0.3.3 2021-01-31 [1] CRAN (R 4.0.2)
RColorBrewer 1.1-2 2014-12-07 [1] CRAN (R 4.0.2)
Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.0.2)
RCurl 1.98-1.5 2021-09-17 [1] CRAN (R 4.0.2)
readr * 2.0.2 2021-09-27 [1] CRAN (R 4.0.2)
readxl 1.3.1 2019-03-13 [1] CRAN (R 4.0.2)
remotes 2.4.1 2021-09-29 [1] CRAN (R 4.0.2)
reprex 2.0.1 2021-08-05 [1] CRAN (R 4.0.2)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.0.2)
rlang 0.4.12 2021-10-18 [1] CRAN (R 4.0.2)
rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.0.5)
rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.2)
RSQLite 2.2.8 2021-08-21 [1] CRAN (R 4.0.2)
rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.2)
rvcheck 0.2.1 2021-10-22 [1] CRAN (R 4.0.2)
rvest 1.0.2 2021-10-16 [1] CRAN (R 4.0.2)
S4Vectors * 0.28.1 2020-12-09 [1] Bioconductor
sass 0.4.0 2021-05-12 [1] CRAN (R 4.0.2)
scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.2)
scatterpie 0.1.7 2021-08-20 [1] CRAN (R 4.0.2)
sessioninfo 1.2.1 2021-11-02 [1] CRAN (R 4.0.5)
shadowtext 0.0.9 2021-09-19 [1] CRAN (R 4.0.2)
stringi 1.7.5 2021-10-04 [1] CRAN (R 4.0.2)
stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
SummarizedExperiment 1.20.0 2020-10-27 [1] Bioconductor
survival 3.2-13 2021-08-24 [1] CRAN (R 4.0.2)
sva 3.38.0 2020-10-27 [1] Bioconductor
testthat 3.1.0 2021-10-04 [1] CRAN (R 4.0.2)
tibble * 3.1.5 2021-09-30 [1] CRAN (R 4.0.2)
tidygraph 1.2.0 2020-05-12 [1] CRAN (R 4.0.2)
tidyr * 1.1.4 2021-09-27 [1] CRAN (R 4.0.2)
tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.0.2)
tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.0.2)
tweenr 1.0.2 2021-03-23 [1] CRAN (R 4.0.2)
tzdb 0.2.0 2021-10-27 [1] CRAN (R 4.0.2)
usethis 2.1.3 2021-10-27 [1] CRAN (R 4.0.2)
utf8 1.2.2 2021-07-24 [1] CRAN (R 4.0.2)
vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.2)
vipor 0.4.5 2017-03-22 [1] CRAN (R 4.0.2)
viridis 0.6.2 2021-10-13 [1] CRAN (R 4.0.2)
viridisLite 0.4.0 2021-04-13 [1] CRAN (R 4.0.2)
vroom 1.5.5 2021-09-14 [1] CRAN (R 4.0.2)
withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.2)
xfun 0.27 2021-10-18 [1] CRAN (R 4.0.2)
XML 3.99-0.8 2021-09-17 [1] CRAN (R 4.0.2)
xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.2)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.0.2)
XVector 0.30.0 2020-10-28 [1] Bioconductor
yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
yulab.utils 0.0.4 2021-10-09 [1] CRAN (R 4.0.2)
zlibbioc 1.36.0 2020-10-28 [1] Bioconductor
[1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ